## Ambulance Data related to acute cerebrovascular accidents
brain_data <- read_csv("data/raw/brain_data.csv") %>%
mutate(across(c(Year, Season, Month, DayOff, COVID, Sudden_Start, Dst_level), as.factor))
## Ambulance Long Data related to acute cerebrovascular accidents
brain_data_long <- read_csv("data/raw/brain_data_long.csv") %>%
mutate(across(c(Year, Season, Month, DayOff, COVID, Sudden_Start, Dst_level, Sex, Age), as.factor))
## Ambulance Data related to cardiovascular diseases
heart_data <- read_csv("data/raw/heart_data.csv") %>%
mutate(across(c(Year, Season, Month, DayOff, COVID, Sudden_Start, Dst_level), as.factor))
## Ambulance Long Data related to unstable angina
UA_data_long <- read_csv("data/raw/UA_data_long.csv") %>%
mutate(across(c(Year, Season, Month, DayOff, COVID, Sudden_Start, Dst_level, Sex, Age), as.factor))
## Ambulance Long Data related to acute myocardial infarction
AMI_data_long <- read_csv("data/raw/AMI_data_long.csv") %>%
mutate(across(c(Year, Season, Month, DayOff, COVID, Sudden_Start, Dst_level, Sex, Age), as.factor))
# Data preprocessing
brain_data_clear <- brain_data %>%
select( where(is.numeric) & !ends_with(c("min", "max", "var", "K_Sum")) ) %>%
filter(complete.cases(.))
# Function for displaying correlations with a neutral color for values close to 0
color_correlation_fixed <- function(data, mapping, ...) {
x <- eval_data_col(data, mapping$x)
y <- eval_data_col(data, mapping$y)
cor_value <- cor(x, y, use = "complete.obs")
# Defining color
color <- if (abs(cor_value) < 0.1) {
scales::alpha("gray50", 0.5) ## Gray for weak correlations
} else if (cor_value > 0) {
scales::alpha("blue", abs(cor_value)) ## Blue for positive correlations
} else {
scales::alpha("red", abs(cor_value)) ## Red for negative correlations
}
# Filter for small correlation values
cor_label <- ifelse(abs(cor_value) < 0.01, "0", sprintf("%.2f", cor_value))
ggplot(data.frame()) +
annotate("text", x = 0.5, y = 0.5, label = cor_label, size = 10, color = color ) +
theme_void()
}
# Function for trend line with a golden color
golden_smooth <- function(data, mapping, ...) {
ggplot(data, mapping) +
geom_point(alpha = 0.4, size = 0.5, color = "black") +
geom_smooth(color = "gold", ...) +
theme_custom
}
# Plot construction
ggpairs(
brain_data_clear,
progress = FALSE,
upper = list(continuous = color_correlation_fixed), ## Highlighting correlations
lower = list(continuous = golden_smooth) )+ ## Golden trend line
theme_custom +
theme(
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10) )
cor_data <- brain_data %>%
select( where(is.numeric) & !ends_with(c("min", "max", "Sum", "var")) ) %>%
rename(Temperature = Temp_mean, Precipitation = H2O,
`F10.7 index` = F10.7, `Ap index` = Ap_mean, `Dst index` = Dst_mean) %>%
cor(use = "pairwise.complete.obs")
corrplot(cor_data, method = 'number', type = 'lower', diag = FALSE)
network_plot(cor_data, min_cor = .1)
When analyzing the network graph, three groups of features can be identified:
Additionally, some features within the groups exhibit a high degree of correlation!
brain_data %>%
drop_na() %>%
ggplot(aes(Temp_mean, Pressure))+
geom_jitter(alpha = 0.5, colour = "royalblue2", size = 1.5)+
geom_smooth(method = "lm", se = FALSE, colour = "orangered2", linewidth = 1.2) +
stat_cor(method = "pearson", label.x.npc = 0.6, label.y.npc = 0.9, size = 7)+
labs(x = "Temperature")+
theme_custom
brain_data %>%
ggplot(aes(Sunspots, F10.7))+
geom_jitter(alpha = 0.5, colour = "royalblue2", size = 1.5)+
geom_smooth(method = "lm", se = FALSE, colour = "orangered2", linewidth = 1.2) +
stat_cor(method = "pearson", label.x.npc = 0, label.y.npc = 0.9, size = 7)+
labs(x = "Sunspot Number", y = "F10.7 index")+
theme_custom
brain_data %>%
ggplot(aes(Dst_mean, Ap_mean))+
geom_jitter(alpha = 0.5, colour = "royalblue2", size = 1.5)+
geom_smooth(method = "lm", se = FALSE, colour = "orangered2", linewidth = 1.2) +
stat_cor(method = "pearson", label.x.npc = 0.6, label.y.npc = 0.9, size = 7)+
labs(x = "Daily Average Dst index", y = "Daily Average Ap index")+
theme_custom
Based on further analysis, the following variables will be excluded:
brain_data %>%
select(!c(Pressure, F10.7, Ap_mean)) %>%
ggplot(aes(Date, Sunspots))+
geom_line(alpha = 0.3, colour = "royalblue2")+
geom_smooth(method = "gam", formula = y ~ s(x, k = 10), se = FALSE,
colour = "orangered4", linewidth = 2) +
geom_vline(xintercept = as.Date("2009-01-01"),
colour = "orangered", linewidth = 1, linetype = "dashed")+
geom_vline(xintercept = as.Date("2019-12-31"),
colour = "orangered", linewidth = 1, linetype = "dashed")+
scale_x_date(date_breaks = "2 year", date_labels = "%Y")+
labs(x = "", y = "Sunspot number")+
theme_custom
The number of sunspots is directly linked to the solar activity cycles, as confirmed by the graph. For further analysis, it is advisable to focus separately on the period from 2009 to 2019, which covers the complete 24th solar activity cycle.
Decomposition allows us to identify trends and cyclicity:
ggarrange(
brain_data %>% filter(Date >= ymd("2007-01-01")) %>% pull(Sunspots) %>%
ts(start = 2007, frequency = 365) %>% stl(s.window = "periodic") %>%
autoplot() + theme_custom + ggtitle("Sunspots"),
brain_data %>% filter(Date >= ymd("2007-01-01")) %>% pull(EMS) %>%
ts(start = 2007, frequency = 365) %>% stl(s.window = "periodic") %>%
autoplot() + theme_custom + ggtitle("EMS"),
brain_data %>% filter(Date >= ymd("2007-01-01")) %>% pull(Temp_mean) %>%
ts(start = 2007, frequency = 365) %>% stl(s.window = "periodic") %>%
autoplot() + theme_custom + ggtitle("Temperature"),
nrow = 1
)
ggarrange(
brain_data %>% filter(Date >= ymd("2007-01-01")) %>% pull(Sunspots) %>%
ts(start = 2007, frequency = 365) %>% stl(s.window = "periodic") %>%
autoplot() + theme_custom + ggtitle("Sunspots"),
brain_data %>% filter(Date >= ymd("2007-01-01")) %>% pull(Ap_mean) %>%
ts(start = 2007, frequency = 365) %>% stl(s.window = "periodic") %>%
autoplot() + theme_custom + ggtitle("Ap Index"),
brain_data %>% filter(Date >= ymd("2007-01-01")) %>% pull(Dst_var) %>%
ts(start = 2007, frequency = 365) %>% stl(s.window = "periodic") %>%
autoplot() + theme_custom + ggtitle("Dst amplitude"),
nrow = 1
)
ggarrange(
brain_data %>%
filter(Date %within% interval(ymd("2007-01-01"), ymd("2019-12-31"))) %>%
group_by(Year, Month) %>%
summarise(EMS_month = sum(EMS, na.rm = TRUE), .groups = "drop") %>%
ggplot(aes(x = Month, y = Year, fill = EMS_month)) +
geom_tile(colour = "black") +
scale_fill_gradient(low = "green", high = "red") +
scale_x_discrete(labels = month.abb) +
labs(x = "", y = "", fill = "", title = "Total Number of Emergency Calls")+
theme_custom,
brain_data %>%
filter(Date %within% interval(ymd("2007-01-01"), ymd("2019-12-31"))) %>%
group_by(Year, Month) %>%
summarise(Sunspots_month = sum(Sunspots, na.rm = TRUE), .groups = "drop") %>%
ggplot(aes(x = Month, y = Year, fill = Sunspots_month)) +
geom_tile(colour = "black") +
scale_fill_gradient(low = "green", high = "red") +
scale_x_discrete(labels = month.abb) +
labs(x = "", y = "", fill = "", title = "Total Number of Sunspots")+
theme_custom
)
brain_data %>%
filter(Dst_level != "Severe") %>%
mutate(Dst_level = fct_relevel(Dst_level, c("Quiet", "Weak", "Moderate", "Major", "Severe"))) %>%
ggplot(aes(Dst_level, Sunspots))+
geom_jitter(alpha = 0.5, size = 1, width = 0.2) +
geom_boxplot(alpha = 0.5, outliers = FALSE, fill = "goldenrod2", linewidth = 1)+
labs( x = "Geomagnetic activity level", y = "Sunspot Number" ) +
theme_custom
mean_values <- brain_data %>%
group_by(DayOff) %>%
summarise(mean_EMS = round(mean(EMS, na.rm = TRUE), 2), .groups = "drop")
ggplot(brain_data, aes(x = DayOff, y = EMS)) +
geom_boxplot(aes(fill = DayOff)) +
geom_text(data = mean_values,
aes(x = DayOff, y = mean_EMS, label = mean_EMS),
color = "black", size = 8, fontface = "bold", vjust = -1) +
scale_fill_brewer(palette = "Dark2", direction = -1)+
scale_y_continuous(limits = c(NA, max(brain_data$EMS) + 3))+
labs(y = "Number of Emergency Medical Calls", x = "Non-working Days")+
theme_custom +
theme(legend.position = "none")+
stat_pvalue_manual(t_test(data = brain_data, EMS ~ DayOff),
label = "T-test, p = {p}",
size = 10,
y.position = max(brain_data$EMS) + 0.1)
brain_data %>%
mutate(Season = fct_relevel(Season, c("Winter", "Spring", "Summer", "Autumn"))) %>%
ggplot(aes(x = Season, y = EMS, fill = Season)) +
geom_boxplot(alpha = 0.7, show.legend = FALSE) +
scale_fill_manual(
values = c("Winter" = "dodgerblue2", "Spring" = "palegreen2", "Summer" = "tan2","Autumn" = "tomato2")) +
labs(y = "Number of Emergency Medical Calls", x = "")+
theme_custom
brain_data %>%
pairwise_t_test(EMS ~ Season, p.adjust.method = "holm") %>%
transmute(`Group 1` = group1, `Group 2` = group2,
`p-value` = p, `p.adjusted` = p.adj) %>%
flextable()
Group 1 | Group 2 | p-value | p.adjusted |
|---|---|---|---|
Autumn | Spring | 0.0162 | 0.0969 |
Autumn | Summer | 0.1300 | 0.5200 |
Spring | Summer | 0.3670 | 1.0000 |
Autumn | Winter | 0.0259 | 0.1290 |
Spring | Winter | 0.8650 | 1.0000 |
Summer | Winter | 0.4670 | 1.0000 |
The distribution of emergency medical service calls across seasons is statistically insignificant (paired t-test with Holm p-value adjustment method: p.adjusted > 0.05).
brain_data %>%
drop_na(Kp_Sum, K_Sum) %>%
ggplot(aes(Kp_Sum, K_Sum))+
geom_jitter(alpha = 0.5, colour = "royalblue2", size = 1.5)+
geom_smooth(method = "lm", se = FALSE, colour = "orangered2", linewidth = 1.2) +
stat_cor(method = "pearson", label.x.npc = 0, label.y.npc = 0.9, size = 7)+
labs(x = "Daily Average Kp index", y = "Daily Average K index (IRT)")+
theme_custom
brain_data %>%
drop_na(Kp_Sum, K_Sum) %>%
pivot_longer(cols = c(Kp_Sum, K_Sum), names_to = "index") %>%
ggplot() +
geom_histogram(aes(x = value, fill = index),
binwidth = 5, colour = "black", alpha = 0.5, position = "identity", show.legend = FALSE) +
scale_fill_manual(
values = c("K_Sum" = "darkseagreen2", "Kp_Sum" = "steelblue2") )+
scale_x_continuous(breaks = seq(0, 50, 10))+
labs( x = "Index Values", y = "Observation Count", fill = "Index Type") +
theme_custom +
facet_wrap(~index, labeller = labeller(index = c("K_Sum" = "Daily Average K index (IRT)",
"Kp_Sum" = "Daily Average Kp index")))
Daily Average Kp index and Daily Average K index (IRT) show a strong correlation (r = 0.91). However, K index (IRT) has a smaller range and a more uniform distribution compared to Kp index.
ggarrange(
brain_data_long %>%
filter(between(as.numeric(as.character(Year)), 2007, 2021), Age != "55+") %>%
group_by(Year, Sex, Age) %>%
summarise(EMS_total = sum(EMS, na.rm = TRUE), .groups = "drop") %>%
mutate(EMS_plot = ifelse(Sex == "Male", -EMS_total, EMS_total)) %>%
ggplot(aes(x = Year, y = EMS_plot, fill = Sex)) +
geom_bar(stat = "identity", alpha = 0.7, colour = "black", show.legend = FALSE) +
scale_y_continuous(labels = abs, n.breaks = 10) +
scale_fill_manual(
values = c("Male" = "steelblue", "Female" = "pink"),
labels = c("Male" = "Male", "Female" = "Female") ) +
coord_flip() +
labs(
title = "Ambulance Calls Distribution by Year, Age and Gender",
x = "", y = "", fill = "Gender") +
theme_custom +
facet_wrap(. ~ Age, scales = "free_x",
labeller = labeller(Age = c("25-44" = "Age: 25-44 years",
"45-54" = "Age: 45-54 years"))),
brain_data_long %>%
filter(between(as.numeric(as.character(Year)), 2007, 2021), Age == "55+") %>%
group_by(Year, Sex, Age) %>%
summarise(EMS_total = sum(EMS, na.rm = TRUE), .groups = "drop") %>%
mutate(EMS_plot = ifelse(Sex == "Male", -EMS_total, EMS_total)) %>%
ggplot(aes(x = Year, y = EMS_plot, fill = Sex)) +
geom_bar(stat = "identity", alpha = 0.7, colour = "black", ) +
scale_y_continuous(labels = abs, n.breaks = 10) +
scale_fill_manual(
values = c("Male" = "steelblue", "Female" = "pink"),
labels = c("Male" = "Male", "Female" = "Female") ) +
coord_flip() +
labs(
x = "", y = "Total EMS", fill = "Gender") +
theme_custom + theme(legend.position = "inside", legend.justification = c(0.9, 0.2))+
facet_wrap(. ~ Age, scales = "free_x",
labeller = labeller(Age = c("55+" = "Age: 55+ years"))),
nrow = 2
)